#@title
# ---------- setup ---------- #
# !pip install geopandas
from google.colab import drive
import pandas as pd
import numpy as np
import altair as alt
import os
import geopandas as gpd
import json
# supress SettingWithCopy warning
pd.set_option('mode.chained_assignment', None)
# mount drive
drive.mount('/content/gdrive/', force_remount = True)
# clean data folder
clean_folder = 'gdrive/My Drive/capp-30239-project/process-data/clean-data'
# ---------- theme ---------- #
def my_theme():
font = 'Gill Sans'
main_palette = ['#a4201d',
'#e4ceae', #
'#f1aaa9',
'#9d9d9d',
'#cd8e35', #
'#0a4c6a']
return {'config': {'background': '#ffffff',
'view': {'height': 400,
'width': 800,
'strokeWidth': 0},
'title': {'anchor': 'start',
'fontSize': 24,
'font': font,
'subtitleFont': font},
'axis': {'titleFont': font,
'labelFont': font,
'titleFontSize': 16,
'labelFontSize': 14,
'titlePadding': 20,
'labelPadding': 5,
'labelLimit': 500},
'legend': {'titleFont': font,
'labelFont': font,
'titleFontSize': 16,
'labelFontSize': 16,
'labelLimit': 500,
'padding': 10,
'strokeColor': '#d3d3d3',
'fillColor': '#ffffff'},
'range': {'category': main_palette}}}
alt.themes.register('my_theme', my_theme)
alt.themes.enable('my_theme')
Vaccinations are extremely important. The Center for Disease Control and Prevention (CDC), for example, included vaccinations on its list of the ten greatest public health achievements of the 20th century – noting that vaccines prevent 2-3 million deaths each year. Polio provides one of the starkest examples of the power of vaccinations. As shown below, the disease has been effectively eradicated in the span of just 40 years.
This tremendous public health feat can undoubtedly be attributed to the widespread adoption of the Polio vaccine. Although Jonas Salk discovered the vaccine in 1955, it's utilization in other parts of the world took off in the 1980s, as shown below. In Southeast Asia – the region with the most cases of Polio – the vaccination coverage rate grew from below 20% at the beginning of the decade to nearly 80% at the end.
#@title
# ---------- cases ---------- #
# import cleaned data
incidence_df = pd.read_csv(os.path.join(clean_folder, 'global-incidence.csv'))
# aggregate by region
incidence_agg = (incidence_df
.groupby(['WHO_REGION', 'Disease', 'year'])
.agg(total_incidences = pd.NamedAgg(column = 'incidences', aggfunc = 'sum'))
.reset_index())
# limit to polio
incidence_polio = incidence_agg[incidence_agg['Disease'] == 'polio']
# create chart
chart1 = alt.Chart(incidence_polio).mark_area(
).encode(
x = alt.X('year:N', title = None),
y = alt.Y('total_incidences:Q', title = 'Total Number of Polio Cases'),
color = alt.Color('WHO_REGION:N', legend = alt.Legend(title = None))
).properties(
title = {'text': ["Polio's Eradication Paralleled the Sharp Rise in Its Vaccination Coverage"],
'subtitle': ['Paralytic Polio Cases & Third Dose Polio (Pol3) Vaccination Coverage by WHO Region, 1980-2018'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
# ---------- coverage ---------- #
# import cleaned data
wuenic_df = pd.read_csv(os.path.join(clean_folder, 'wuenic.csv'))
# aggregate by region (note: giving each country equal weight!)
wuenic_agg = (wuenic_df
.groupby(['Region', 'Vaccine', 'year'])
.agg(avg_coverage = pd.NamedAgg(column = 'coverage', aggfunc = 'mean'))
.reset_index())
# limit to polio
wuenic_polio = wuenic_agg[wuenic_agg['Vaccine'] == 'Pol3'].dropna()
# create chart
chart2 = alt.Chart(wuenic_polio).mark_line(
).encode(
x = alt.X('year:N', title = None),
y = alt.Y('avg_coverage:Q', title = 'Average Vaccination Coverage (%)'),
color = alt.Color('Region', legend = alt.Legend(title = None)))
# ---------- combine ---------- #+
# combine charts
alt.vconcat(chart1, chart2
).configure_legend(
orient = 'top-right'
).configure_view(
height = 250)
Data Source (Top): WHO Reported Disease Incidence – Table 3.1
Data Source (Bottom): WHO / UNICEF Estimates of National Immunization Coverage – Table 4.6
Despite the incredible public health value of vaccinations, the past several years have seen a rise in distrust towards vaccines. In fact, in 2019, the WHO included vaccine hesitancy – or the reluctance or refusal to vaccinate despite the availability of vaccines – on its list of the ten greatest threats to global health. One of the clearest examples of the consequences of vaccine hesitancy in the U.S. is the recent outbreaks of measles – a disease that was declared eliminated in the U.S. twenty years ago.
The 2014 spike can largely be attributed to outbreaks in Ohio Amish country and California's Disneyland outbreak. The 2019 spike primarily reflects the outbreak in ulta-Orthodox Jewish communities in Brooklyn.
#@title
# ---------- wrangle ---------- #
# import cleaned data
incidence_df = pd.read_csv(os.path.join(clean_folder, 'global-incidence.csv'))
# limit to US and measles
us_measles = incidence_df[(incidence_df['ISO_code'] == 'USA') &
(incidence_df['Disease'] == 'measles') &
(incidence_df['year'] >= 1995)].dropna()
# add 2018 / 2019 data from CDC - https://www.cdc.gov/measles/cases-outbreaks.html
us_measles = us_measles.append({'year': 2018, 'incidences': 375}, ignore_index = True)
us_measles = us_measles.append({'year': 2019, 'incidences': 1282}, ignore_index = True)
# format labels
us_measles['label'] = us_measles['incidences'].astype(int).astype(str) + ' cases'
# ---------- chart ---------- #
# create vertical line
vertical_line_df = pd.DataFrame({'x': [2000], 'y': [1300]})
vertical_line_df['label'] = 'The CDC officially declared measles eliminated in the U.S. in 2000'
vertical_line = alt.Chart(vertical_line_df
).mark_rule(
color = '#a4201d',
size = 2
).encode(
x = 'x:N')
# add labels (cases)
text = alt.Chart(us_measles).mark_text(
dy = -10,
fontSize = 16,
font = 'Gill Sans'
).encode(
x = 'year:N',
y = 'incidences:Q',
text = 'label:N'
).transform_filter(
(alt.datum.year == 2019) | (alt.datum.year == 2014))
# add more labels (annotation)
more_text = alt.Chart(vertical_line_df).mark_text(
dx = 10,
dy = -12,
fontSize = 18,
align = 'left',
font = 'Gill Sans'
).encode(
x = 'x:N',
y = 'y:Q',
text = 'label:N')
# create chart
chart = alt.Chart(us_measles).mark_bar(
color = '#0a4c6a'
).encode(
x = alt.X('year:N', title = None),
y = alt.Y('incidences:Q',
title = 'Total Number of Measles Cases',
scale = alt.Scale(domain = [0, 1400]))
).properties(
title = {'text': ['Measles Outbreaks are Becomming Increasingly Common in the U.S.'],
'subtitle': ['Total Number of Measles Cases in the U.S., 1995-2019'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
chart + vertical_line + text + more_text
Data Sources: WHO Reported Disease Incidence – Table 3.1 and CDC Measles Cases and Outbreaks Update
The rise of measles outbreaks undoubtedly reflects declining vaccination rates – specifically for the Measles, Mumps, and Rubella (MMR) vaccine. The WHO recommends between 93 to 95% of the population receive the MMR vaccine achieve herd immunity (i.e., where the number of secondary infections generated by each person is less than 1). Considering the average across all kindergarteners in each state, 14 states currently have MMR vaccination coverage rates below this threshold.
#@title
# ---------- wrangle ---------- #
# import cleaned data
cdc_coverage_df = pd.read_csv(os.path.join(clean_folder, 'cdc-coverage.csv'))
# import states geojson
states = gpd.read_file(os.path.join(clean_folder, 'us-states.json'))
# merge datasets
merged = states.merge(cdc_coverage_df, left_on = 'NAME', right_on = 'state')
# limit
merged = merged[merged['vaccine'] == 'MMR']
# create binary herd immunity variable
merged['below-herd'] = np.where(merged['cov_18-19'] < 93,
'Below 93% coverage',
'Above 93% coverage')
# get centroids for labels
merged['centroid_lon'] = merged['geometry'].centroid.x
merged['centroid_lat'] = merged['geometry'].centroid.y
# format labels
below = merged[(merged['below-herd'] == 'Below 93% coverage') &
(merged['NAME'] != 'New Hampshire') &
(merged['NAME'] != 'Hawaii')]
below['label'] = below['cov_18-19'].astype(int).astype(str) + '%'
# format labels separately for New Hampshire and Hawaii (color / offset)
others = merged[merged['NAME'].isin(['New Hampshire', 'Hawaii'])]
others.loc[others['NAME'] == 'New Hampshire', 'centroid_lon'] = -69
others.loc[others['NAME'] == 'New Hampshire', 'centroid_lat'] = 43
others.loc[others['NAME'] == 'Hawaii', 'centroid_lon'] = -157.5
others.loc[others['NAME'] == 'Hawaii', 'centroid_lat'] = 20
others['label'] = others['cov_18-19'].astype(int).astype(str) + '%'
# ---------- chart ---------- #
# add labels
labels = alt.Chart(below).mark_text(
color = '#ffffff',
font = 'Gill Sans'
).encode(
longitude = 'centroid_lon:Q',
latitude = 'centroid_lat:Q',
text = 'label',
size = alt.value(12.5))
# add more labels (New Hampshire and Hawaii)
more_labels = alt.Chart(others).mark_text(
color = '#000000',
font = 'Gill Sans'
).encode(
longitude = 'centroid_lon:Q',
latitude = 'centroid_lat:Q',
text = 'label',
size = alt.value(12.5))
# create main chart
alt.Chart(merged).mark_geoshape(stroke = '#ffffff',
strokeWidth = 0.5).encode(
color = alt.Color('below-herd:N',
legend = alt.Legend(title = None,
orient = 'none',
legendX = 650,
legendY = 270),
scale = alt.Scale(range = ['#e4ceae', '#a4201d']))
).properties(
projection = {'type': 'albersUsa'},
title = {'text': ['14 States Have MMR Coverage Below the Herd Immunity Threshold'],
'subtitle': ['MMR Kindergarten Vaccination Coverage, 2018-2019 School Year'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'}) + labels + more_labels
Data Source: CDC School Year Vaccination Coverage Trend Report
In 2018, the Wellcome Global Monitor conducted a survey to explore how people around the world think and feel about science and health challenges. They asked individuals to respond on a 5-point Likert scale to three questions relevant to attitudes towards vaccines – individuals were asked about the degree to which they agree that vaccines are effective, important, and safe.
Americans largely agreed that vaccines are indeed effective, important, and safe. However, differences across demographic groups are noteworthy – older, less educated, more rural, and more religious Americans were less likely to belive that vaccines are safe. Attitudes towards vaccinations across income groups followed a U-shaped curve (i.e., with the poorest and richest quantiles being most likely to harbor vaccine hesitancy).
#@title
# ---------- wrangle ---------- #
def transform_likert(row):
'''
Returns (start_pct, end_pct) for each value along the likert scale
'''
if row['likert'] == 'Strongly agree':
return (row['Somewhat agree'], row['Strongly agree'] + row['Somewhat agree'])
elif row['likert'] == 'Somewhat agree':
return (0, row['Somewhat agree'])
elif row['likert'] == 'Somewhat disagree':
return (-row['Somewhat disagree'], 0)
elif row['likert'] == 'Strongly disagree':
return (-row['Somewhat disagree'] - row['Strongly disagree'], -row['Somewhat disagree'])
else:
return (0, row["Don't know/Refused"] + row['Neither agree nor disagree'])
# import cleaned data
wgm_df = pd.read_csv(os.path.join(clean_folder, 'wgm-survey.csv'))
# reshape / transfrom for divergent likert scale
wgm_df['pct'] = wgm_df['pct'] * 100
wgm_reshaped = wgm_df.pivot_table(index = ['country', 'question', 'group'],
columns = 'likert',
values = 'pct').reset_index()
merged = wgm_df.merge(wgm_reshaped, on = ['country', 'question', 'group'])
merged['bounds'] = merged.apply(transform_likert, axis = 1)
merged[['pct_start', 'pct_end']] = pd.DataFrame(merged['bounds'].tolist())
merged = merged[['country', 'question', 'likert', 'pct', 'group', 'pct_start', 'pct_end']]
# ---------- national ---------- #
# limit
limit1 = merged[(merged['country'] == 'United States') &
(merged['group'] == 'National') &
(merged['likert'].isin(['Strongly agree',
'Somewhat agree',
'Somewhat disagree',
'Strongly disagree']))]
# create vertical line
vertical_line = alt.Chart(pd.DataFrame({'x': [0]})
).mark_rule(
color = '#f5f5f5',
size = 2
).encode(
x = 'x:Q')
# create bar plot
chart1 = alt.Chart(limit1).mark_bar(
).encode(
x = alt.X('pct_start:Q', title = 'Percent', axis = alt.Axis(titlePadding = 5)),
x2 = alt.X2('pct_end:Q'),
y = alt.Y('question:N', title = None, axis = alt.Axis(labelFontSize = 16)),
color = alt.Color('likert:N',
legend = alt.Legend(title = None,
orient = 'none',
legendX = 365,
legendY = 0),
scale = alt.Scale(domain = ["Strongly disagree",
"Somewhat disagree",
"Somewhat agree",
"Strongly agree"],
range = ['#a4201d', '#f1aaa9', '#e4ceae', '#cd8e35']))
).properties(
height = 70)
# combine
combined1 = (chart1 + vertical_line)
# ---------- groups ---------- #
# limit
limit2 = merged[(merged['country'] == 'United States') &
(merged['question'] == 'Vaccines are safe') &
(merged['group'] != 'National') &
(merged['likert'].isin(['Strongly agree',
'Somewhat agree',
'Somewhat disagree',
'Strongly disagree']))]
# add group labels
limit2 = limit2.append({'group': 'GENDER'}, ignore_index = True)
limit2 = limit2.append({'group': 'AGE COHORT'}, ignore_index = True)
limit2 = limit2.append({'group': 'EDUCATIONAL BACKGROUND'}, ignore_index = True)
limit2 = limit2.append({'group': 'AREA TYPE'}, ignore_index = True)
limit2 = limit2.append({'group': 'RELIGIOUS IDENTITY'}, ignore_index = True)
limit2 = limit2.append({'group': 'PER CAPITA INCOME'}, ignore_index = True)
# create bar plot
chart2 = alt.Chart(limit2).mark_bar(
).encode(
x = alt.X('pct_start:Q', title = 'Percent', axis = alt.Axis(titlePadding = 5)),
x2 = alt.X2('pct_end:Q'),
y = alt.Y('group:N', title = None,
sort = ['GENDER', 'Male', 'Female',
'AGE COHORT', 'Age 15-29', 'Age 30-49', 'Age 50+',
'EDUCATIONAL BACKGROUND',
'Elementary education or less (0-8 years of formal education)',
'Secondary education, (9-15 years of formal education)',
'College level education, (Beyond 15 years of formal education)',
'AREA TYPE', 'Rural area or small town', 'City or suburb of city',
'RELIGIOUS IDENTITY', 'Religious', 'Secular, non-religious',
'PER CAPITA INCOME', 'Poorest 20%', 'Second 20%', 'Third 20%', 'Fourth 20%', 'Richest 20%']),
color = alt.Color('likert:N',
legend = alt.Legend(title = None),
scale = alt.Scale(domain = ["Strongly disagree",
"Somewhat disagree",
"Somewhat agree",
"Strongly agree"],
range = ['#a4201d', '#f1aaa9', '#e4ceae', '#cd8e35']))
).properties(
title = {'text': ['... But Not All Demographic Groups Believe they are Equally Safe'],
'subtitle': ['Do you strongly or somewhat agree, or strongly or somewhat disagree that vaccines are safe?'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'},
height = 500)
# combine
combined2 = (chart2 + vertical_line)
# ---------- combine ---------- #
alt.vconcat(combined1, combined2
).configure_axis(
labelFontSize = 15
).properties(
title = {'text': ['Most Americans Believe Vaccines are Effective, Important, and Safe'],
'subtitle': ['Do you strongly or somewhat agree, or strongly or somewhat disagree with the following statement?'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'}
).configure_view(
width = 350)
Data Source: Wellcome Global Monitor 2018 Survey
The trends captured in the survey data above also manifest in actual vaccination coverage. As shown below, low-income groups (those below the federal poverty line) are vaccinated at rates roughy 10% lower than those living above the poverty line. Perhaps more concerningly, this trend seems to be growing in recent years (i.e., the difference in coverage between wealthier and poorer individuals is diverging).
#@title
# ---------- wrangle ---------- #
# import cleaned data
cdc_031_df = pd.read_csv(os.path.join(clean_folder, 'cdc-031.csv'))
# limit
cdc_031_df = cdc_031_df[(cdc_031_df['group'].isin(['Below poverty level',
'At or above poverty level',
'All']))&
(cdc_031_df['vaccine'] == 'Combined 7-vaccine series')]
cdc_031_df.loc[cdc_031_df['group'] == 'All', 'group'] = 'National average'
# ---------- chart ---------- #
# create chart
alt.Chart(cdc_031_df).mark_line(point = True).encode(
x = alt.X('year:N', title = None),
y = alt.Y('coverage:Q', title = 'Vaccination Coverage (%)'),
color = alt.Color('group',
legend = alt.Legend(title = None,
orient = 'bottom-right'),
scale = alt.Scale(range = ['#cd8e35', '#0a4c6a', '#9d9d9d']))
).properties(
title = {'text': ['Low-Income Groups are Vaccinated at Lower (and Declining) Rates'],
'subtitle': ['Combined 7-Vaccine Series Coverage Rate Among Children Aged 19-35 Months by Poverty Level, 2009-2017'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
Data Source: CDC Table 031
California's vaccination trends have garnered particular attention in recent years – making the state a particularly useful case study. After the 2014 Disneyland measles outbreak where 159 individuals contracted the measles after a trip to the theme park, the state took a series of policy steps to address its concerningly low vaccination rates.
As shown below, between 2014 and 2016, California's average vaccination rate indeed rose sharply – although as shown by the map, several counties (mostly in northern California) still have MMR coverage rates below 85%. As noted earlier, herd immunity for the measles requires coverage between 93 to 95%.
#@title
# ---------- trend chart ---------- #
# import cleaned data
cdc_coverage_df = pd.read_csv(os.path.join(clean_folder, 'cdc-coverage.csv'))
# limit
limited = cdc_coverage_df[(cdc_coverage_df['state'].isin(['California', 'Median'])) &
(cdc_coverage_df['vaccine'] == 'MMR')]
# reshape
reshaped = limited.melt(id_vars = ['state'],
value_vars = ['cov_11-12',
'cov_12-13',
'cov_13-14',
'cov_14-15',
'cov_15-16',
'cov_16-17',
'cov_17-18',
'cov_18-19'],
var_name = 'school-year',
value_name = 'coverage')
# format labels
reshaped.loc[reshaped['school-year'] == 'cov_11-12', 'school-year'] = '2011'
reshaped.loc[reshaped['school-year'] == 'cov_12-13', 'school-year'] = '2012'
reshaped.loc[reshaped['school-year'] == 'cov_13-14', 'school-year'] = '2013'
reshaped.loc[reshaped['school-year'] == 'cov_14-15', 'school-year'] = '2014'
reshaped.loc[reshaped['school-year'] == 'cov_15-16', 'school-year'] = '2015'
reshaped.loc[reshaped['school-year'] == 'cov_16-17', 'school-year'] = '2016'
reshaped.loc[reshaped['school-year'] == 'cov_17-18', 'school-year'] = '2017'
reshaped.loc[reshaped['school-year'] == 'cov_18-19', 'school-year'] = '2018'
reshaped.loc[reshaped['school-year'] == 'cov_19-20', 'school-year'] = '2019'
reshaped.loc[reshaped['state'] == 'Median', 'state'] = 'National average'
# make chart
chart = alt.Chart(reshaped).mark_line(point = True).encode(
x = alt.X('school-year:N', title = None),
y = alt.Y('coverage:Q', title = 'MMR Vaccination Coverage (%)',
scale = alt.Scale(domain = [90, 100])),
color = alt.Color('state:N',
legend = alt.Legend(title = None,
orient = 'top-left'),
scale = alt.Scale(range = ['#a4201d', '#9d9d9d']))
).properties(
title = {'text': [''],
'fontSize': 5,
'subtitle': ['MMR Coverage, 2011-2018 School Years'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'},
width = 400)
# ---------- 2018 map ---------- #
# import cleaned data
ca_df = pd.read_csv(os.path.join(clean_folder, 'ca-2016-2018.csv'))
# drop NAs (< 20 are masked)
ca_df = ca_df.dropna(subset = ['COUNT'])
# group by county
ca_agg = (ca_df.groupby(['SCHOOL_YEAR', 'COUNTY', 'CATEGORY'])
.agg(sum_enrollment = pd.NamedAgg(column = 'ENROLLMENT', aggfunc = 'sum'),
sum_count = pd.NamedAgg(column = 'COUNT', aggfunc = 'sum'))
.reset_index())
ca_agg['pct'] = ca_agg['sum_count'] / ca_agg['sum_enrollment']
# limit
ca_agg = ca_agg[ca_agg['CATEGORY'] == 'MMR']
# reshape wide
agg_wide = ca_agg.pivot(index = 'COUNTY', columns = 'SCHOOL_YEAR', values = 'pct')
# import california counties geojson
ca_counties = gpd.read_file(os.path.join(clean_folder, 'california-counties.json'))
ca_counties['name'] = ca_counties['name'].str.upper()
ca_counties = ca_counties[['name', 'geometry']]
# merge datasets
merged = ca_counties.merge(agg_wide, left_on = 'name', right_on = 'COUNTY', how = 'left')
# group coverage rates into buckets
merged.loc[(merged['2018-2019'] < 0.85), 'category'] = '<85%'
merged.loc[(merged['2018-2019'] > 0.85) & (merged['2018-2019'] < 0.90), 'category'] = '85-90%'
merged.loc[(merged['2018-2019'] > 0.90) & (merged['2018-2019'] < 0.95), 'category'] = '90-95%'
merged.loc[(merged['2018-2019'] > 0.95), 'category'] = '95-100%'
merged.loc[merged['2018-2019'].isna(), 'category'] = 'Data unavailable'
# create map
map = alt.Chart(merged).mark_geoshape(
stroke = '#ffffff',
strokeWidth = 0.5
).encode(color = alt.Color('category:N',
legend = alt.Legend(title = None,
orient = 'none',
legendX = 200,
legendY = 80),
scale = alt.Scale(domain = ['<85%',
'85-90%',
'90-95%',
'95-100%',
'Data unavailable'],
range = ['#6e1614', '#a4201d', '#e9807d', '#f8d5d4', '#ececec']))
).properties(projection = {'type': 'albersUsa'},
title = {'text': [''],
'fontSize': 5,
'subtitle': ['MMR Coverage by County, 2018-2019 School Year'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'},
width = 300)
# ---------- combine ---------- #
alt.hconcat(chart, map,
).resolve_scale(color = 'independent'
).configure_view(
height = 400
).properties(
title = {'text': ["MMR Coverage Has Improved – But Several Counties Still Have Low Rates"],
'subtitle': ['Measles, Mumps, and Rubella (MMR) Vaccination Coverage Among Enrolled Kindergarteners'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'}
)
Undoubtedly, an important contributor to low vaccination coverage rates is the rise in exemptions granted for mandatory vaccines. Most states allow medical and non-medical exemptions. Over time, the number of medical exemptions has remained fairly stable, but the number of non-medical exemptions has risen (i.e., from roughly 15 to over 22 per 1,000 children since 2011).
In 2016, California cracked down on non-medical exemptions by passing a law effectively removing personal beliefs as a permissable reason for vaccination exemption. By and large, this was effective in reducing the total number of exemptions – although there was an unsurprising (and highly suspicious) uptick in medical exemptions after this law was passed. Overall, this suggests that California's ban on non-medical exemptions may be an effective policy for other states to adopt.
#@title
# ---------- national ---------- #
# import cleaned data
cdc_exemptions_df = pd.read_csv(os.path.join(clean_folder, 'cdc-exemptions.csv'))
# aggregate states
grouped = cdc_exemptions_df.groupby('exemption').sum().reset_index()
# scale relative to enrollment
grouped['2011'] = grouped['ex_11-12'] / grouped['enroll_11-12']
grouped['2012'] = grouped['ex_12-13'] / grouped['enroll_12-13']
grouped['2013'] = grouped['ex_13-14'] / grouped['enroll_03-14']
grouped['2014'] = grouped['ex_14-15'] / grouped['enroll_14-15']
grouped['2015'] = grouped['ex_15-16'] / grouped['enroll_15-16']
grouped['2016'] = grouped['ex_16-17'] / grouped['enroll_16-17']
grouped['2017'] = grouped['ex_17-18'] / grouped['enroll_17-18']
grouped['2018'] = grouped['ex_18-19'] / grouped['enroll_18-19']
# reshape and limit
reshaped = grouped.melt(id_vars = ['exemption'],
value_vars = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018'],
var_name = 'school-year',
value_name = 'pct-exempt')
reshaped['pct-exempt'] = reshaped['pct-exempt'] * 1000
reshaped = reshaped[reshaped['exemption'] != 'Any']
reshaped.loc[reshaped['exemption'] == 'Medical', 'exemption'] = 'Medical exemptions'
reshaped.loc[reshaped['exemption'] == 'Non-Medical', 'exemption'] = 'Non-medical exemptions'
# create chart
chart1 = alt.Chart(reshaped).mark_area().encode(
x = alt.X('school-year:N', title = None),
y = alt.Y('pct-exempt:Q', title = 'Exemptions per 1,000 Children'),
color = alt.Color('exemption:N',
legend = alt.Legend(title = None,
orient = 'none',
legendX = 670,
legendY = 8),
scale = alt.Scale(range = ['#cd8e35', '#a4201d']))
).properties(
title = {'text': ['Non-Medical Vaccine Exemptions are Trending Upwards in the U.S.'],
'subtitle': ['Enrolled Kindergarteners with One or More Vaccine Exemption, 2011-2018'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
# ---------- california ---------- #
# import cleaned data
grouped = pd.read_csv(os.path.join(clean_folder, 'cdc-exemptions.csv'))
# scale relative to enrollment
grouped['2011'] = grouped['ex_11-12'] / grouped['enroll_11-12']
grouped['2012'] = grouped['ex_12-13'] / grouped['enroll_12-13']
grouped['2013'] = grouped['ex_13-14'] / grouped['enroll_03-14']
grouped['2014'] = grouped['ex_14-15'] / grouped['enroll_14-15']
grouped['2015'] = grouped['ex_15-16'] / grouped['enroll_15-16']
grouped['2016'] = grouped['ex_16-17'] / grouped['enroll_16-17']
grouped['2017'] = grouped['ex_17-18'] / grouped['enroll_17-18']
grouped['2018'] = grouped['ex_18-19'] / grouped['enroll_18-19']
# reshape and scale
reshaped = grouped.melt(id_vars = ['state', 'exemption'],
value_vars = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018'],
var_name = 'school-year',
value_name = 'pct-exempt')
reshaped['pct-exempt'] = reshaped['pct-exempt'] * 1000
reshaped.loc[reshaped['exemption'] == 'Medical', 'exemption'] = 'Medical exemptions'
reshaped.loc[reshaped['exemption'] == 'Non-Medical', 'exemption'] = 'Non-medical exemptions'
# limit
reshaped = reshaped[(reshaped['exemption'] != 'Any') &
(reshaped['state'] == 'California')].fillna(0)
# vertical line
vertical_line_df = pd.DataFrame({'x': [2015], 'y': [29], 'label': "California's SB-277 bans"})
vertical_line_df = vertical_line_df.append({'x': [2015], 'y': [26.5], 'label': 'personal belief exemptions'}, ignore_index = True)
vertical_line = alt.Chart(vertical_line_df
).mark_rule(
color = '#9d9d9d',
size = 1.5
).encode(
x = 'x:N')
# add labels (annotation)
more_text = alt.Chart(vertical_line_df).mark_text(
dx = 10,
dy = -12,
fontSize = 16,
align = 'left',
font = 'Gill Sans'
).encode(
x = 'x:N',
y = 'y:Q',
text = 'label:N')
# create chart
chart2 = alt.Chart(reshaped).mark_line(point = True).encode(
x = alt.X('school-year:N', title = None),
y = alt.Y('pct-exempt:Q', title = 'Exemptions per 1,000 Children'),
color = alt.Color('exemption:N',
legend = alt.Legend(title = None),
scale = alt.Scale(range = ['#cd8e35', '#a4201d']))
).properties(
title = {'text': ["... But California's 2016 Crackdown was (Mostly) Sucessful"],
'subtitle': ["Enrolled Kindergarteners with One or More Vaccine Exemption in California, 2011-2018"],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
chart3 = vertical_line + chart2 + more_text
# ---------- combine ---------- #
alt.vconcat(chart1, chart3
).configure_view(
height = 200,
width = 650)
Data Source: CDC School Year Vaccination Exemptions Trend Report
Despite this crackdown, there are still a shocking number of schools in California with alarmingly low vaccination rates. One particular charter school network accounts for a disproportionate share of these schools – Inspire Charter Schools – where parents pick their children's curricula. Several news outlets including Politico and Mother Jones have reported on the astonishing vaccination coverage rates of California's charter schools – many of which have coverage below 50%.
#@title
# ---------- wrangle ---------- #
# import cleaned data
ca_df = pd.read_csv(os.path.join(clean_folder, 'ca-2016-2018.csv'))
# drop NAs (< 20 are masked)
ca_df = ca_df.dropna(subset = ['COUNT'])
# convert to percentage
ca_df['pct'] = ca_df['COUNT'] / ca_df['ENROLLMENT'] * 100
# flag inspire charter schools
ca_df['inspire-charter'] = np.where(ca_df['SCHOOL_NAME'].str.upper().str.startswith('INSPIRE CHARTER'),
'Inspire Charter school',
'All other schools')
# limit
limited = ca_df[(ca_df['CATEGORY'] == 'MMR')]
# ---------- chart ---------- #
# vertical line
vertical_line_df = pd.DataFrame({'x': [93], 'y': [1150], 'label': '93-95% of a community needs to be'})
vertical_line_df = vertical_line_df.append({'x': [93], 'y': [1100], 'label': 'vaccinated to achieve herd immmunity'}, ignore_index = True)
vertical_line_df = vertical_line_df.append({'x': [93], 'y': [1050], 'label': 'against measles'}, ignore_index = True)
vertical_line = alt.Chart(vertical_line_df
).mark_rule(
color = '#9d9d9d',
size = 1.5
).encode(
x = 'x:Q')
# add labels (annotation)
more_text = alt.Chart(vertical_line_df).mark_text(
dx = -260,
dy = 9,
fontSize = 16,
align = 'left',
font = 'Gill Sans'
).encode(
x = 'x:Q',
y = 'y:Q',
text = 'label:N')
# disable MaxRowsError
alt.data_transformers.disable_max_rows()
# make chart
chart1 = alt.Chart(limited).mark_circle(
).encode(
x = alt.X('pct', title = 'MMR Vaccination Coverage (%)'),
y = alt.Y('ENROLLMENT', title = 'Enrolled Kindergarteners'),
color = alt.Color('inspire-charter:N',
title = None,
scale = alt.Scale(range = ['#a4201d', '#e4ceae']),
sort = ['Inspire Charter school'])
).properties(
title = {'text': ["Never Send Your Child to an Inspire Charter School"],
'subtitle': ['MMR Kindergarten Vaccination Coverage for California Schools, 2016-2019 School Years'],
'subtitleFontSize': 20,
'subtitleFont': 'Gill Sans'})
# add shape encoding
chart2 = alt.Chart(limited).mark_square().encode(
x = 'pct',
y = 'ENROLLMENT',
color = alt.Color('inspire-charter:N',
title = None,
scale = alt.Scale(range = ['#a4201d', '#e4ceae']),
sort = ['Inspire Charter school'])
).transform_filter(
(alt.datum['inspire-charter'] == 'Inspire Charter school'))
# combine charts
(chart1 + chart2 + vertical_line + more_text
).configure_view(
width = 650)